16. Quiz: Mini-Batch Gradient Descent

Programming Quiz for "Mini-Batch Gradient Descent"

Mini-Batch Gradient Descent Quiz

In this quiz, you'll be given the following sample dataset (as in data.csv), and your goal is to write a function that executes mini-batch gradient descent to find a best-fitting regression line. You might consider looking into numpy's matmul function for this!

Start Quiz:

import numpy as np
# Setting a random seed, feel free to change it and see different solutions.
np.random.seed(42)


# TODO: Fill in code in the function below to implement a gradient descent
# step for linear regression, following a squared error rule. See the docstring
# for parameters and returned variables.
def MSEStep(X, y, W, b, learn_rate = 0.005):
    """
    This function implements the gradient descent step for squared error as a
    performance metric.
    
    Parameters
    X : array of predictor features
    y : array of outcome values
    W : predictor feature coefficients
    b : regression function intercept
    learn_rate : learning rate

    Returns
    W_new : predictor feature coefficients following gradient descent step
    b_new : intercept following gradient descent step
    """
    
    # Fill in code
    
    return W_new, b_new


# The parts of the script below will be run when you press the "Test Run"
# button. The gradient descent step will be performed multiple times on
# the provided dataset, and the returned list of regression coefficients
# will be plotted.
def miniBatchGD(X, y, batch_size = 20, learn_rate = 0.005, num_iter = 25):
    """
    This function performs mini-batch gradient descent on a given dataset.

    Parameters
    X : array of predictor features
    y : array of outcome values
    batch_size : how many data points will be sampled for each iteration
    learn_rate : learning rate
    num_iter : number of batches used

    Returns
    regression_coef : array of slopes and intercepts generated by gradient
      descent procedure
    """
    n_points = X.shape[0]
    W = np.zeros(X.shape[1]) # coefficients
    b = 0 # intercept
    
    # run iterations
    regression_coef = [np.hstack((W,b))]
    for _ in range(num_iter):
        batch = np.random.choice(range(n_points), batch_size)
        X_batch = X[batch,:]
        y_batch = y[batch]
        W, b = MSEStep(X_batch, y_batch, W, b, learn_rate)
        regression_coef.append(np.hstack((W,b)))
    
    return regression_coef


if __name__ == "__main__":
    # perform gradient descent
    data = np.loadtxt('data.csv', delimiter = ',')
    X = data[:,:-1]
    y = data[:,-1]
    regression_coef = miniBatchGD(X, y)
    
    # plot the results
    import matplotlib.pyplot as plt
    
    plt.figure()
    X_min = X.min()
    X_max = X.max()
    counter = len(regression_coef)
    for W, b in regression_coef:
        counter -= 1
        color = [1 - 0.92 ** counter for _ in range(3)]
        plt.plot([X_min, X_max],[X_min * W + b, X_max * W + b], color = color)
    plt.scatter(X, y, zorder = 3)
    plt.show()
-0.72407,2.23863
-2.40724,-0.00156
2.64837,3.01665
0.36092,2.31019
0.67312,2.05950
-0.45460,1.24736
2.20168,2.82497
1.15605,2.21802
0.50694,1.43644
-0.85952,1.74980
-0.59970,1.63259
1.46804,2.43461
-1.05659,1.02226
1.29177,3.11769
-0.74565,0.81194
0.15033,2.81910
-1.49627,0.53105
-0.72071,1.64845
0.32924,1.91416
-0.28053,2.11376
-1.36115,1.70969
0.74678,2.92253
0.10621,3.29827
0.03256,1.58565
-0.98290,2.30455
-1.15661,1.79169
0.09024,1.54723
-1.03816,1.06893
-0.00604,1.78802
0.16278,1.84746
-0.69869,1.58732
1.03857,1.94799
-0.11783,3.09324
-0.95409,1.86155
-0.81839,1.88817
-1.28802,1.39474
0.62822,1.71526
-2.29674,1.75695
-0.85601,1.12981
-1.75223,1.67000
-1.19662,0.66711
0.97781,3.11987
-1.17110,0.56924
0.15835,2.28231
-0.58918,1.23798
-1.79678,1.35803
-0.95727,1.75579
0.64556,1.91470
0.24625,2.33029
0.45917,3.25263
1.21036,2.07602
-0.60116,1.54254
0.26851,2.79202
0.49594,1.96178
-2.67877,0.95898
0.49402,1.96690
1.18643,3.06144
-0.17741,1.85984
0.57938,1.82967
-2.14926,0.62285
2.27700,3.63838
-1.05695,1.11807
1.68288,2.91735
-1.53513,1.99668
0.00099,1.76149
0.45520,2.31938
-0.37855,0.90172
1.35638,3.49432
0.01763,1.87838
2.21725,2.61171
-0.44442,2.06623
0.89583,3.04041
1.30499,2.42824
0.10883,0.63190
1.79466,2.95265
-0.00733,1.87546
0.79862,3.44953
-0.12353,1.53740
-1.34999,1.59958
-0.67825,1.57832
-0.17901,1.73312
0.12577,2.00244
1.11943,2.08990
-3.02296,1.09255
0.64965,1.28183
1.05994,2.32358
0.53360,1.75136
-0.73591,1.43076
-0.09569,2.81376
1.04694,2.56597
0.46511,2.36401
-0.75463,2.30161
-0.94159,1.94500
-0.09314,1.87619
-0.98641,1.46602
-0.92159,1.21538
0.76953,2.39377
0.03283,1.55730
-1.07619,0.70874
0.20174,1.76894
def MSEStep(X, y, W, b, learn_rate = 0.001):
    """
    This function implements the gradient descent step for squared error as a
    performance metric.
    
    Parameters
    X : array of predictor features
    y : array of outcome values
    W : predictor feature coefficients
    b : regression function intercept
    learn_rate : learning rate

    Returns
    W_new : predictor feature coefficients following gradient descent step
    b_new : intercept following gradient descent step
    """
    
    # compute errors
    y_pred = np.matmul(X, W) + b
    error = y - y_pred
    
    # compute steps
    W_new = W + learn_rate * np.matmul(error, X)
    b_new = b + learn_rate * error.sum()
    return W_new, b_new